"""
Phase 4: Continuous Rating Model — PanelGLS
=============================================
Uses rating_numeric (21-point scale) as dependent variable.
Maximizes power by using all rating variation.

Output: table7_rating_panelgls.md, phase4_results.csv
"""

import sys
import numpy as np
import pandas as pd
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

PROJECT_DIR = Path("/mnt/c/demographics_capital_flows/safe_asset_cliff")
ROOT_DIR = PROJECT_DIR.parent
PROCESSED_DIR = PROJECT_DIR / "data" / "processed"
TABLES_DIR = PROJECT_DIR / "output" / "tables"

sys.path.insert(0, str(ROOT_DIR / "multilateral" / "src"))
from model import PanelGLS


def stars(p):
    if p < 0.01: return '***'
    if p < 0.05: return '**'
    if p < 0.1: return '*'
    return ''


def fit_and_report(y, X, entity_ids, time_ids, feature_names, label):
    model = PanelGLS()
    model.fit(y, X, entity_ids, time_ids)
    print(f"\n{'=' * 70}")
    print(f"  {label}")
    print(f"  N={model.n_obs:,}, {model.n_countries} countries, "
          f"R²={model.r_squared:.4f}, rho={model.rho:.3f}")
    print(f"{'=' * 70}")
    model.summary(feature_names=feature_names)
    result_df = model.to_dataframe(feature_names=feature_names)
    result_df['model'] = label
    result_df['n_obs'] = model.n_obs
    result_df['n_countries'] = model.n_countries
    result_df['r_squared'] = model.r_squared
    result_df['rho'] = model.rho
    return model, result_df


def write_markdown_table(path, title, headers, rows, notes=None):
    lines = [f"### {title}", ""]
    lines.append("| " + " | ".join(headers) + " |")
    lines.append("|" + "|".join(["--:" if i > 0 else ":--" for i in range(len(headers))]) + "|")
    for row in rows:
        lines.append("| " + " | ".join(str(c) for c in row) + " |")
    if notes:
        lines.append("")
        lines.append(f"*{notes}*")
    lines.append("")
    path.write_text("\n".join(lines), encoding="utf-8")
    print(f"  Saved: {path}")


def extract_coef(model, feature_names, var):
    idx = feature_names.index(var)
    return model.beta[idx], model.se[idx], model.pvalues[idx]


def main():
    print("=" * 70)
    print("PHASE 4: Continuous Rating Model — PanelGLS")
    print("=" * 70)

    df = pd.read_csv(PROCESSED_DIR / "cliff_panel.csv")
    print(f"Loaded: {df['iso3'].nunique()} countries, {len(df):,} obs")

    all_results = []
    table_rows = []

    demo_vars = ['Z_1', 'Z_2', 'Z_3']
    controls_base = ['rgdp_growth', 'inflation', 'kaopen']
    controls_base = [c for c in controls_base if c in df.columns]

    # ── Spec 1: Z₁, Z₂, Z₃ + controls ──
    vars1 = demo_vars + controls_base
    est = df.dropna(subset=['rating_numeric'] + vars1)
    if len(est) >= 100:
        label = "1. Demographics Z + controls"
        m, r = fit_and_report(
            est['rating_numeric'].values, est[vars1].values,
            est['iso3'].values, est['year'].values, vars1, label)
        all_results.append(r)
        c, se, p = extract_coef(m, vars1, 'Z_1')
        table_rows.append([label, f"{c:.2f}{stars(p)}", f"({se:.2f})",
                           "-", "-", f"{m.n_obs:,}", f"{m.r_squared:.3f}"])

    # ── Spec 2: OADR + controls ──
    vars2 = ['old_dep'] + controls_base
    est = df.dropna(subset=['rating_numeric'] + vars2)
    if len(est) >= 100:
        label = "2. OADR + controls"
        m, r = fit_and_report(
            est['rating_numeric'].values, est[vars2].values,
            est['iso3'].values, est['year'].values, vars2, label)
        all_results.append(r)
        c, se, p = extract_coef(m, vars2, 'old_dep')
        table_rows.append([label, f"{c:.2f}{stars(p)}", f"({se:.2f})",
                           "-", "-", f"{m.n_obs:,}", f"{m.r_squared:.3f}"])

    # ── Spec 3: OADR spline at 20% ──
    vars3 = ['old_dep', 'oadr_spline_20'] + controls_base
    est = df.dropna(subset=['rating_numeric'] + vars3)
    if len(est) >= 100:
        label = "3. OADR spline (20%)"
        m, r = fit_and_report(
            est['rating_numeric'].values, est[vars3].values,
            est['iso3'].values, est['year'].values, vars3, label)
        all_results.append(r)
        c_base, se_base, p_base = extract_coef(m, vars3, 'old_dep')
        c_spline, se_spline, p_spline = extract_coef(m, vars3, 'oadr_spline_20')
        table_rows.append([label,
                           f"{c_base:.2f}{stars(p_base)}", f"({se_base:.2f})",
                           f"{c_spline:.2f}{stars(p_spline)}", f"({se_spline:.2f})",
                           f"{m.n_obs:,}", f"{m.r_squared:.3f}"])

    # ── Spec 4: Lagged fiscal stress ──
    fiscal_vars = ['exp_rev_gap_lag', 'debt_change_5y']
    fiscal_avail = [v for v in fiscal_vars if v in df.columns]
    vars4 = ['old_dep'] + fiscal_avail + controls_base
    est = df.dropna(subset=['rating_numeric'] + vars4)
    if len(est) >= 100:
        label = "4. OADR + lagged fiscal stress"
        m, r = fit_and_report(
            est['rating_numeric'].values, est[vars4].values,
            est['iso3'].values, est['year'].values, vars4, label)
        all_results.append(r)
        c, se, p = extract_coef(m, vars4, 'old_dep')
        table_rows.append([label, f"{c:.2f}{stars(p)}", f"({se:.2f})",
                           "-", "-", f"{m.n_obs:,}", f"{m.r_squared:.3f}"])

    # ── Spec 5: Predetermined OADR ──
    if 'oadr_plus10' in df.columns:
        vars5 = ['oadr_plus10'] + controls_base
        est = df.dropna(subset=['rating_numeric'] + vars5)
        if len(est) >= 100:
            label = "5. Predetermined OADR+10"
            m, r = fit_and_report(
                est['rating_numeric'].values, est[vars5].values,
                est['iso3'].values, est['year'].values, vars5, label)
            all_results.append(r)
            c, se, p = extract_coef(m, vars5, 'oadr_plus10')
            table_rows.append([label, f"{c:.2f}{stars(p)}", f"({se:.2f})",
                               "-", "-", f"{m.n_obs:,}", f"{m.r_squared:.3f}"])

    # ── Spec 6: Dynamic (lagged rating) ──
    df['rating_numeric_lag'] = df.groupby('iso3')['rating_numeric'].shift(1)
    vars6 = ['rating_numeric_lag', 'old_dep'] + fiscal_avail + controls_base
    est = df.dropna(subset=['rating_numeric'] + vars6)
    if len(est) >= 100:
        label = "6. Dynamic (lagged rating)"
        m, r = fit_and_report(
            est['rating_numeric'].values, est[vars6].values,
            est['iso3'].values, est['year'].values, vars6, label)
        all_results.append(r)
        c_lag, se_lag, p_lag = extract_coef(m, vars6, 'rating_numeric_lag')
        c_oadr, se_oadr, p_oadr = extract_coef(m, vars6, 'old_dep')
        table_rows.append([label,
                           f"{c_oadr:.2f}{stars(p_oadr)}", f"({se_oadr:.2f})",
                           f"lag_rat: {c_lag:.2f}{stars(p_lag)}", f"({se_lag:.2f})",
                           f"{m.n_obs:,}", f"{m.r_squared:.3f}"])

    # ── Write table ──
    if table_rows:
        write_markdown_table(
            TABLES_DIR / "table7_rating_panelgls.md",
            "Table 7: Continuous Rating Model — PanelGLS",
            ["Specification", "OADR/Z₁ Coef", "(SE)", "Spline/Other", "(SE)",
             "N", "R²"],
            table_rows,
            notes="Dependent variable: rating_numeric (21-point S&P scale). "
                  "PanelGLS with AR(1) correction."
        )

    # ── Save results ──
    if all_results:
        results_df = pd.concat(all_results, ignore_index=True)
        results_df.to_csv(TABLES_DIR / "phase4_results.csv", index=False)
        print(f"\n  Saved: {TABLES_DIR / 'phase4_results.csv'}")

    print("\n" + "=" * 70)
    print("Phase 4 complete.")
    print("=" * 70)


if __name__ == "__main__":
    main()
